library(readxl)
library(dplyr)
library(plyr)
library(Hmisc)

#reading data

df <- read_excel('Data_Extract_From_Enterprise_Surveys.xlsx')

#controlling data

str(df)
tibble[,19] [16,795 × 19] (S3: tbl_df/tbl/data.frame)
 $ Country Name : chr [1:16795] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
 $ Country Code : chr [1:16795] "AFG" "AFG" "AFG" "AFG" ...
 $ Series Name  : chr [1:16795] "Age of the establishment (years)" "Annual employment growth (%)" "Bribery depth (% of public transactions where a gift or informal payment was requested)" "Bribery incidence (percent of firms experiencing at least one bribe payment request)" ...
 $ Series Code  : chr [1:16795] "IC.FRM.FCHAR.CAR1" "IC.FRM.EMP.GROW.PEFT2" "IC.FRM.BRIB.GRAFT2" "IC.FRM.BRIB.GRAFT3" ...
 $ 2006 [YR2006]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2007 [YR2007]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2008 [YR2008]: chr [1:16795] "7.1" "13.3" "24.8" "36.7" ...
 $ 2009 [YR2009]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2010 [YR2010]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2011 [YR2011]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2012 [YR2012]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2013 [YR2013]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2014 [YR2014]: chr [1:16795] "9.5" "6.9" "34.6" "46.8" ...
 $ 2015 [YR2015]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2016 [YR2016]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2017 [YR2017]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2018 [YR2018]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2019 [YR2019]: chr [1:16795] ".." ".." ".." ".." ...
 $ 2020 [YR2020]: chr [1:16795] ".." ".." ".." ".." ...

#controlling data

head(df)

#changing column names

colnames(df)[1] <- "country"
colnames(df)[2] <- "code"
colnames(df)[3] <- "serie"
colnames(df)[4] <- "seirsCode"

#turing some columns into rows

library(reshape2)
df <- melt(df,id.vars=c('country','code','serie'), measure.vars =c(colnames(df)[5:length(colnames(df))]))

#checking the data

head(df)

#changing column name

colnames(df)[4] <- "year"

#eliminating data where value column is nan

df <- subset(df, subset = is.na(df$value) == F)

#checking the data

sum(is.na(df$value))
[1] 0

#checking the data by looking number of countries

count(df, "code")

#checking the data

head(df, 20)

#checking the data

tail(df)

#using a function to give nan values to “..”

fillP <- function(x){
  if (x=="..") {
    return(NA)
  } 
  else {
    return(as.numeric(x))
  }
  }
df$value <- sapply(df$value, fillP)

#checking the data

str(df)
'data.frame':   251850 obs. of  5 variables:
 $ country: chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
 $ code   : chr  "AFG" "AFG" "AFG" "AFG" ...
 $ serie  : chr  "Age of the establishment (years)" "Annual employment growth (%)" "Bribery depth (% of public transactions where a gift or informal payment was requested)" "Bribery incidence (percent of firms experiencing at least one bribe payment request)" ...
 $ year   : Factor w/ 15 levels "2006 [YR2006]",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ value  : num  NA NA NA NA NA NA NA NA NA NA ...

#checking the data

sum(is.na(df$country))
[1] 0
sum(is.na(df$code))
[1] 0
sum(is.na(df$year))
[1] 0
sum(is.na(df$value) == F)
[1] 32362

#pivoting the data

df_p <- dcast(df, country + code + year ~ serie, value.var="value", fun.aggregate=sum)

#checking the data

head(df_p)
tail(df_p)

#creating a new dataframe by taking ‘Percent of firms that spend on R&D’ as reference

df_m <- subset(df_p, subset = is.na(df_p['Percent of firms that spend on R&D'])==FALSE)
head(df_m)

#number of unique countries

length(unique(df_m$country))
[1] 128

#elimination those countries having more than one oberservation

for (i in unique(df_m$country)){
  d <- subset(df_m, subset = df_m$country == i)
  if (nrow(d) > 1){
    d <- tail(d, 1)
    df_m <- subset(df_m, subset = df_m$country != i)
    df_m <- rbind(df_m, d) 
  }
}

df_m
tail(df_m, 10)
subset(df_m, subset = df_m$country == "Albania")

#dropping a column bcz of missing values

df_m['Percent of firms using e-mail to interact with clients/suppliers'] <- NULL

#the number rows in case dropping all nan values

nrow(na.omit(df_m))
[1] 73

#corr table for ‘Percent of firms that spend on R&D’

nums <- unlist(lapply(df_m, is.numeric)) 
mydata.rcorr <- rcorr(as.matrix(df_m[,nums]))
c <- as.data.frame(mydata.rcorr$r)
c[order(c["Percent of firms that spend on R&D"]),]["Percent of firms that spend on R&D"]
NA
NA

#variable list for the model based on corr table

model.list <- c("country","code",
                "Percent of firms with at least 10% of foreign ownership",
                "Percent of firms with an internationally-recognized quality certification",
                "Percent of firms with female participation in ownership",
                "Percent of firms having their own Web site",
                "Percent of firms offering formal training",
                "Proportion of investment financed by banks (%)",
                "Proportion of private domestic ownership in a firm (%)",
                "Percent of firms expected to give gifts in meetings with tax officials",
                "Percent of firms competing against unregistered or informal firms",
                "Percent of firms that spend on R&D")

#model dataframe

df_m <- df_m[model.list]
colnames(df_m)[3:length(colnames(df_m))] <- c("foreign","certi","fOwn","web","train", "bank","dOwn","tax","informal","rd")
head(df_m)

#checking the data

summary(df_m)
   country              code              foreign          certi            fOwn            web            train            bank       
 Length:128         Length:128         Min.   : 0.00   Min.   : 0.10   Min.   : 2.20   Min.   : 1.80   Min.   : 1.90   Min.   : 0.000  
 Class :character   Class :character   1st Qu.: 4.60   1st Qu.: 6.30   1st Qu.:23.73   1st Qu.:30.18   1st Qu.:20.80   1st Qu.: 7.075  
 Mode  :character   Mode  :character   Median : 9.00   Median :11.60   Median :33.35   Median :45.60   Median :31.35   Median :13.500  
                                       Mean   :11.28   Mean   :14.88   Mean   :34.39   Mean   :47.33   Mean   :33.33   Mean   :14.791  
                                       3rd Qu.:13.82   3rd Qu.:22.10   3rd Qu.:44.05   3rd Qu.:64.62   3rd Qu.:43.38   3rd Qu.:20.550  
                                       Max.   :73.30   Max.   :57.90   Max.   :76.00   Max.   :91.70   Max.   :79.20   Max.   :41.700  
      dOwn             tax           informal           rd        
 Min.   : 35.20   Min.   : 0.00   Min.   : 7.20   Min.   : 1.000  
 1st Qu.: 84.62   1st Qu.: 3.35   1st Qu.:35.92   1st Qu.: 7.375  
 Median : 90.65   Median : 7.65   Median :53.45   Median :11.750  
 Mean   : 87.51   Mean   :11.90   Mean   :51.53   Mean   :14.911  
 3rd Qu.: 95.30   3rd Qu.:16.88   3rd Qu.:68.90   3rd Qu.:21.050  
 Max.   :100.00   Max.   :62.60   Max.   :95.20   Max.   :46.100  

#checking the data

str(df_m)
'data.frame':   128 obs. of  12 variables:
 $ country : chr  "Afghanistan" "Antigua and Barbuda" "Bahamas, The" "Bangladesh" ...
 $ code    : chr  "AFG" "ATG" "BHS" "BGD" ...
 $ foreign : num  0 8.5 14.9 1.9 11.8 14.2 4.6 19.4 3.7 20.6 ...
 $ certi   : num  22.1 3.7 31.1 14.3 18.3 25.1 0.7 14.4 3.7 10.6 ...
 $ fOwn    : num  2.2 21.3 58.3 12.7 43.5 44.2 30.4 36.7 43.3 44 ...
 $ web     : num  21.8 26.3 50.1 26.3 68.2 91.2 27.7 38.3 31.3 25.6 ...
 $ train   : num  31.7 24.9 37.1 21.9 35.5 57.8 14.4 20 26 32 ...
 $ bank    : num  1.5 32.3 11.7 12.4 12.8 34.6 18.1 3.6 18.9 14.8 ...
 $ dOwn    : num  100 92.2 85.3 98.4 88.4 84.9 96.9 77.5 95.5 83.3 ...
 $ tax     : num  34 6.1 10.9 41 1.6 0.2 6.7 11 0.3 20 ...
 $ informal: num  41.5 78.1 67.5 39.4 49.4 19.2 58.5 66.8 7.2 51.3 ...
 $ rd      : num  20.9 22.9 28.4 17.3 33.7 28.9 4.8 14 7.3 30.9 ...

#historgrams for each variables


for (i in colnames(df_m)[3:length(colnames(df_m))]){
  hist(df_m[,i], main = i, xlab = i)

  
}

#scatter plot for each variables


for (i in colnames(df_m)[3:length(colnames(df_m))]){
  scatter.smooth(df_m[,i], df_m[,"rd"], main = i, xlab = i)

  
}

#dealing with outliers

subset(df_m, subset = df_m$informal > 80)

#dealing with outliers

subset(df_m, subset = df_m$rd > 45)

#dealing with outliers

df_m <- subset(df_m, subset = df_m$rd < 45)
df_m <- subset(df_m, subset = df_m$foreign < 60)
df_m <- subset(df_m, subset = df_m$certi < 50)
df_m <- subset(df_m, subset = df_m$dOwn > 50)
df_m <- subset(df_m, subset = df_m$tax < 50)
df_m <- subset(df_m, subset = df_m$informal < 85)
head(df_m)

#scatter plot for each variables

for (i in colnames(df_m)[3:length(colnames(df_m))]){
  scatter.smooth(df_m[,i], df_m[,"rd"], main = i, xlab = i)
}

#showing there is no any missing values

nrow(na.omit(df_m))
[1] 116
nrow((df_m))
[1] 116

#corr matrix

library(corrgram)

Attaching package: ‘corrgram’

The following object is masked from ‘package:lattice’:

    panel.fill

The following object is masked from ‘package:plyr’:

    baseball
corrgram(df_m,order=TRUE, lower.panel=panel.shade,
  upper.panel=panel.pie, text.panel=panel.txt)

#corr matrix

nums <- unlist(lapply(df_m, is.numeric)) 
mydata.rcorr <- rcorr(as.matrix(df_m[,nums]))
c <- as.data.frame(mydata.rcorr$r)
c

#there are high correaltion between foreing & dOwn, certi & web.

#simple regression for each variables with Percent of firms that spend on R&D

for (i in colnames(df_m)[3:(length(colnames(df_m))-1)]){
  model <- lm(rd ~ ., df_m[c("rd",i)])
  print(summary(model))
}

Call:
lm(formula = rd ~ ., data = df_m[c("rd", i)])

Residuals:
    Min      1Q  Median      3Q     Max 
-14.080  -6.148  -2.071   6.171  28.559 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 10.58188    1.26068   8.394 1.46e-13 ***
foreign      0.29987    0.09023   3.323   0.0012 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.671 on 114 degrees of freedom
Multiple R-squared:  0.08832,   Adjusted R-squared:  0.08033 
F-statistic: 11.04 on 1 and 114 DF,  p-value: 0.001196


Call:
lm(formula = rd ~ ., data = df_m[c("rd", i)])

Residuals:
     Min       1Q   Median       3Q      Max 
-14.4564  -5.7194  -0.7811   5.8237  29.9088 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 10.10358    1.38998   7.269 4.87e-11 ***
certi        0.26357    0.08061   3.270  0.00142 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.683 on 114 degrees of freedom
Multiple R-squared:  0.08575,   Adjusted R-squared:  0.07773 
F-statistic: 10.69 on 1 and 114 DF,  p-value: 0.001423


Call:
lm(formula = rd ~ ., data = df_m[c("rd", i)])

Residuals:
    Min      1Q  Median      3Q     Max 
-18.634  -5.414  -1.439   5.942  27.115 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  6.93873    1.93302   3.590 0.000490 ***
fOwn         0.19869    0.05102   3.895 0.000166 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.531 on 114 degrees of freedom
Multiple R-squared:  0.1174,    Adjusted R-squared:  0.1097 
F-statistic: 15.17 on 1 and 114 DF,  p-value: 0.0001662


Call:
lm(formula = rd ~ ., data = df_m[c("rd", i)])

Residuals:
    Min      1Q  Median      3Q     Max 
-13.841  -6.006  -1.731   5.881  29.098 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.71681    2.01071   4.833 4.24e-06 ***
web          0.08591    0.03852   2.230   0.0277 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.889 on 114 degrees of freedom
Multiple R-squared:  0.04181,   Adjusted R-squared:  0.0334 
F-statistic: 4.974 on 1 and 114 DF,  p-value: 0.02769


Call:
lm(formula = rd ~ ., data = df_m[c("rd", i)])

Residuals:
     Min       1Q   Median       3Q      Max 
-15.8087  -4.6459  -0.8223   3.2174  30.8266 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.02634    1.67769   1.804   0.0739 .  
train        0.32862    0.04644   7.076 1.28e-10 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.569 on 114 degrees of freedom
Multiple R-squared:  0.3052,    Adjusted R-squared:  0.2991 
F-statistic: 50.07 on 1 and 114 DF,  p-value: 1.285e-10


Call:
lm(formula = rd ~ ., data = df_m[c("rd", i)])

Residuals:
    Min      1Q  Median      3Q     Max 
-16.206  -6.378  -1.384   4.904  27.084 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.33866    1.53355   6.090 1.57e-08 ***
bank         0.29417    0.08603   3.419 0.000872 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.648 on 114 degrees of freedom
Multiple R-squared:  0.09302,   Adjusted R-squared:  0.08506 
F-statistic: 11.69 on 1 and 114 DF,  p-value: 0.0008719


Call:
lm(formula = rd ~ ., data = df_m[c("rd", i)])

Residuals:
    Min      1Q  Median      3Q     Max 
-17.029  -5.886  -2.137   6.643  24.663 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 36.99278    7.53144   4.912 3.04e-06 ***
dOwn        -0.26221    0.08468  -3.097  0.00246 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.721 on 114 degrees of freedom
Multiple R-squared:  0.07759,   Adjusted R-squared:  0.0695 
F-statistic: 9.589 on 1 and 114 DF,  p-value: 0.002464


Call:
lm(formula = rd ~ ., data = df_m[c("rd", i)])

Residuals:
    Min      1Q  Median      3Q     Max 
-13.281  -7.030  -1.926   6.322  27.881 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 14.78764    1.22174  12.104   <2e-16 ***
tax         -0.08841    0.08001  -1.105    0.272    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 9.033 on 114 degrees of freedom
Multiple R-squared:  0.0106,    Adjusted R-squared:  0.001916 
F-statistic: 1.221 on 1 and 114 DF,  p-value: 0.2715


Call:
lm(formula = rd ~ ., data = df_m[c("rd", i)])

Residuals:
    Min      1Q  Median      3Q     Max 
-14.382  -6.919  -1.543   5.689  29.107 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.47767    2.20118   4.306 3.54e-05 ***
informal     0.08670    0.04086   2.122    0.036 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.907 on 114 degrees of freedom
Multiple R-squared:  0.03799,   Adjusted R-squared:  0.02955 
F-statistic: 4.502 on 1 and 114 DF,  p-value: 0.03602

#lineral model with all variables

model <- lm(rd ~ ., subset(df_m, select = -c(country, code)))
summary(model)

Call:
lm(formula = rd ~ ., data = subset(df_m, select = -c(country, 
    code)))

Residuals:
     Min       1Q   Median       3Q      Max 
-13.6912  -3.5780  -0.8568   4.2476  26.7149 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 19.49882   12.53097   1.556  0.12268    
foreign     -0.05806    0.13656  -0.425  0.67160    
certi        0.26084    0.08445   3.089  0.00257 ** 
fOwn         0.06659    0.04905   1.358  0.17745    
web         -0.07355    0.04718  -1.559  0.12201    
train        0.25721    0.05917   4.347 3.17e-05 ***
bank         0.12738    0.08268   1.541  0.12636    
dOwn        -0.23035    0.12506  -1.842  0.06828 .  
tax          0.04503    0.07416   0.607  0.54497    
informal     0.03933    0.03613   1.089  0.27882    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.107 on 106 degrees of freedom
Multiple R-squared:  0.4305,    Adjusted R-squared:  0.3821 
F-statistic: 8.902 on 9 and 106 DF,  p-value: 6.785e-10

#linear model by dropping web dOwn variables based on corr table

model <- lm(rd ~ ., subset(df_m, select = -c(country, code, web, dOwn)))
summary(model)

Call:
lm(formula = rd ~ ., data = subset(df_m, select = -c(country, 
    code, web, dOwn)))

Residuals:
    Min      1Q  Median      3Q     Max 
-13.483  -4.385  -1.195   3.647  30.498 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -5.23029    2.87158  -1.821 0.071315 .  
foreign      0.16306    0.07945   2.052 0.042563 *  
certi        0.18654    0.07380   2.528 0.012931 *  
fOwn         0.05940    0.04978   1.193 0.235397    
train        0.21813    0.05531   3.944 0.000143 ***
bank         0.11338    0.08363   1.356 0.177987    
tax          0.06932    0.07344   0.944 0.347331    
informal     0.05935    0.03525   1.684 0.095151 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.227 on 108 degrees of freedom
Multiple R-squared:  0.3999,    Adjusted R-squared:  0.361 
F-statistic: 10.28 on 7 and 108 DF,  p-value: 7.966e-10

#histogram for residuals


res <- residuals(model)


res <- as.data.frame(res)


ggplot(res,aes(res)) +  geom_histogram(fill='blue',alpha=0.5)

plots for residuals

plot(model)

#splitting data into train test datas

#
library(caTools)

set.seed(101) 


sample <- sample.split(df_m$rd, SplitRatio = 0.85) 

train = subset(subset(df_m, select = -c(country, code, web, dOwn)), sample == TRUE)

test = subset(subset(df_m, select =-c(country, code, web, dOwn)), sample == FALSE)

#training linear model

model <- lm(rd ~ .,train)
r.predictions <- predict(model,test)

#predicting besed on test datas

results <- cbind(r.predictions,test$rd) 
colnames(results) <- c('pred','real')
results <- as.data.frame(results)

#dealing with zero values

to_zero <- function(x){
    if  (x < 0){
        return(0)
    }else{
        return(x)
    }
}
results$pred <- sapply(results$pred,to_zero)

#calculatinng mean squarted error

mse <- mean((results$real-results$pred)^2)
print(mse)
[1] 47.05415

#calculatinng coefficient of determination

SSE = sum((results$pred - results$real)^2)
SST = sum( (mean(df_m$rd) - results$real)^2)

R2 = 1 - SSE/SST
R2
[1] 0.4271447

#appling random forest model

library(randomForest)
randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.

Attaching package: ‘randomForest’

The following object is masked from ‘package:ggplot2’:

    margin

The following object is masked from ‘package:dplyr’:

    combine
model <- randomForest(rd ~ .,train,mportance = TRUE, na.action = na.omit) ##training  model
model

Call:
 randomForest(formula = rd ~ ., data = train, mportance = TRUE,      na.action = na.omit) 
               Type of random forest: regression
                     Number of trees: 500
No. of variables tried at each split: 2

          Mean of squared residuals: 57.57264
                    % Var explained: 28.49
r.predictions <- predict(model,test)  #predicting besed on test datas
results <- cbind(r.predictions,test$rd) 
colnames(results) <- c('pred','real')
results <- as.data.frame(results)
to_zero <- function(x){   #dealing with zero values
    if  (x < 0){
        return(0)
    }else{
        return(x)
    }
}
results$pred <- sapply(results$pred,to_zero)
mse <- mean((results$real-results$pred)^2)    #calculatinng mean squarted error
print(mse)
[1] 53.51444
SSE = sum((results$pred - results$real)^2)   #calculatinng coefficient of determination
SST = sum( (mean(df_m$rd) - results$real)^2)

R2 = 1 - SSE/SST
R2
[1] 0.3484946

#showing which variable is important based on random forest model.

importance(model)
         IncNodePurity
foreign      1352.6656
certi         942.5003
fOwn         1284.3216
train        1668.3132
bank          723.7881
tax           508.6516
informal      772.4876

#clustring based on explanatory variables (5 cluster)

km_5 <- kmeans( subset(df_m, select = -c(country, code, web, dOwn)), 5,nstart = 10)

#clustring based on explanatory variables (7 cluster)

km_7 <- kmeans( subset(df_m, select = -c(country, code, web, dOwn)), 7, nstart = 10)

#adding predicted cluster to dataframe

df_m["k5"] <- km_5$cluster
df_m["k7"] <- km_7$cluster

#visualization of clusters

library(cluster) 
clusplot(df_m, df_m$k5, color=TRUE, shade=TRUE, labels=0,lines=0, )

clusplot(df_m, df_m$k7, color=TRUE, shade=TRUE, labels=0,lines=0, )

library(tidyr)

Attaching package: ‘tidyr’

The following object is masked from ‘package:reshape2’:

    smiths
library(grid) 
library(rworldmap)
Loading required package: sp
### Welcome to rworldmap ###
For a short introduction type :      vignette('rworldmap')
library(mapproj)
Loading required package: maps

Attaching package: ‘maps’

The following object is masked from ‘package:cluster’:

    votes.repub

The following object is masked from ‘package:plyr’:

    ozone
df_m
#join(df_m, as.data.frame(worldMap$NAME),type ='right', by = worldMap$NAME )

#world map based on Percent of firms that spend on R&D variable

worldMap<-getMap() # worldmap laden

mf <- merge(df_m, as.data.frame(worldMap$ISO_A3),  by.x = 'code', by.y = "worldMap$ISO_A3", sort = TRUE,all.y=TRUE )
m <-  which(worldMap$ISO_A3%in%mf$code)

Coords <- lapply(m, function(i){
  f <- data.frame(worldMap@polygons[[i]]@Polygons[[1]]@coords)
  f$region =as.character(worldMap$ISO_A3[i])
  colnames(f) <- list("long", "lat", "region")
  return(f)
})

Coords <- do.call("rbind", Coords)

tw <- data.frame(country = mf$code, value = mf$rd)
Coords$value2014 <- tw$value[match(Coords$region,tw$country)]

mp <- ggplot() + geom_polygon(data = Coords, aes(x = long, y = lat, group = region, fill = value2014),
                             colour = "black", size = 0.1) 
  #coord_map(xlim = c(-13, 35),  ylim = c(32, 71))
            

mp <- mp + scale_fill_gradient2(name = "R&D", low = "coral", mid="white", high = "blue", midpoint=20, space="Lab", na.value = "grey50")


mp <- mp + theme(#panel.grid.minor = element_line(colour = NA), panel.grid.minor = element_line(colour = NA),
               #panel.background = element_rect(fill = NA, colour = NA),
               axis.text.x = element_blank(),
               axis.text.y = element_blank(), axis.ticks.x = element_blank(),
               axis.ticks.y = element_blank(), axis.title = element_blank(),
               #rect = element_blank(),
               plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines"))
mp

#world map based on clusters

worldMap<-getMap() # worldmap laden

mf <- merge(df_m, as.data.frame(worldMap$ISO_A3),  by.x = 'code', by.y = "worldMap$ISO_A3", sort = TRUE,all.y=TRUE )
m <-  which(worldMap$ISO_A3%in%mf$code)

Coords <- lapply(m, function(i){
  f <- data.frame(worldMap@polygons[[i]]@Polygons[[1]]@coords)
  f$region =as.character(worldMap$ISO_A3[i])
  colnames(f) <- list("long", "lat", "region")
  return(f)
})

Coords <- do.call("rbind", Coords)

tw <- data.frame(country = mf$code, value = mf$k5)
Coords$value <- tw$value[match(Coords$region,tw$country)]

mp <- ggplot() + geom_polygon(data = Coords, aes(x = long, y = lat, group = region, fill = value),
                             colour = "black", size = 0.1) 
  #coord_map(xlim = c(-13, 35),  ylim = c(32, 71))
            

mp <- mp + scale_fill_gradient2(name = "Clusters", low = "red", mid="white", high = "blue", space="Lab", na.value = "grey50", midpoint = 3)


mp <- mp + theme(#panel.grid.minor = element_line(colour = NA), panel.grid.minor = element_line(colour = NA),
               #panel.background = element_rect(fill = NA, colour = NA),
               axis.text.x = element_blank(),
               axis.text.y = element_blank(), axis.ticks.x = element_blank(),
               axis.ticks.y = element_blank(), axis.title = element_blank(),
               #rect = element_blank(),
               #plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines")
               )
mp

#making a new dataframe for neural network

df_m <- na.omit(df_m)
df_n <- subset(df_m, select = -c(country, code, k5,k7, dOwn, web))

#making formula based on column names for the model

n <- names(df_n)
 as.formula(paste("rd ~", paste(n[!n %in% "rd"], collapse = " + ")))
rd ~ foreign + certi + fOwn + train + bank + tax + informal

#finding max and mins for each cloumn

maxs <- apply(df_n, 2, max) 
mins <- apply(df_n, 2, min)

#scaling columns

scaled <- as.data.frame(scale(df_n, center = mins, scale = maxs - mins))

#splitting dataset

split = sample.split(scaled$rd, SplitRatio = 0.90)

train = subset(scaled, split == TRUE)
test = subset(scaled, split == FALSE)

#training model

library(neuralnet)
n <- names(train)
f <- as.formula(paste("rd ~", paste(n[!n %in% "rd"], collapse = " + ")))
model <- neuralnet(f,data=train,hidden=c(5,3),linear.output=TRUE)

#predicting and plotting predicted and real values

predicted.nn.values <- compute(model, test[1:(length(test)-1)])
true.predictions <- predicted.nn.values$net.result*(max(df_n$rd)-min(df_n$rd))+min(df_n$rd)
test.r <- (test$rd)*(max(df_n$rd)-min(df_n$rd))+min(df_n$rd)
error.df <- data.frame(test.r,true.predictions)
ggplot(error.df,aes(x=test.r,y=true.predictions)) + geom_point() + stat_smooth(method = "lm")

LS0tCnRpdGxlOiAiVWRlbXlfUHJvamVjdCIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KHJlYWR4bCkKbGlicmFyeShkcGx5cikKbGlicmFyeShwbHlyKQpsaWJyYXJ5KEhtaXNjKQpgYGAKCiNyZWFkaW5nIGRhdGEKYGBge3J9CmRmIDwtIHJlYWRfZXhjZWwoJ0RhdGFfRXh0cmFjdF9Gcm9tX0VudGVycHJpc2VfU3VydmV5cy54bHN4JykKYGBgCgojY29udHJvbGxpbmcgZGF0YQpgYGB7cn0Kc3RyKGRmKQpgYGAKI2NvbnRyb2xsaW5nIGRhdGEKYGBge3J9CmhlYWQoZGYpCmBgYAojY2hhbmdpbmcgY29sdW1uIG5hbWVzCmBgYHtyfQpjb2xuYW1lcyhkZilbMV0gPC0gImNvdW50cnkiCmNvbG5hbWVzKGRmKVsyXSA8LSAiY29kZSIKY29sbmFtZXMoZGYpWzNdIDwtICJzZXJpZSIKY29sbmFtZXMoZGYpWzRdIDwtICJzZWlyc0NvZGUiCmBgYAoKCgoKI3R1cmluZyBzb21lIGNvbHVtbnMgaW50byByb3dzCmBgYHtyfQpsaWJyYXJ5KHJlc2hhcGUyKQpkZiA8LSBtZWx0KGRmLGlkLnZhcnM9YygnY291bnRyeScsJ2NvZGUnLCdzZXJpZScpLCBtZWFzdXJlLnZhcnMgPWMoY29sbmFtZXMoZGYpWzU6bGVuZ3RoKGNvbG5hbWVzKGRmKSldKSkKCmBgYAojY2hlY2tpbmcgdGhlIGRhdGEKYGBge3J9CmhlYWQoZGYpCmBgYAoKCiNjaGFuZ2luZyBjb2x1bW4gbmFtZQpgYGB7cn0KY29sbmFtZXMoZGYpWzRdIDwtICJ5ZWFyIgpgYGAKCgojZWxpbWluYXRpbmcgZGF0YSB3aGVyZSB2YWx1ZSBjb2x1bW4gaXMgbmFuCmBgYHtyfQpkZiA8LSBzdWJzZXQoZGYsIHN1YnNldCA9IGlzLm5hKGRmJHZhbHVlKSA9PSBGKQpgYGAKI2NoZWNraW5nIHRoZSBkYXRhCmBgYHtyfQpzdW0oaXMubmEoZGYkdmFsdWUpKQpgYGAKCiNjaGVja2luZyB0aGUgZGF0YSBieSBsb29raW5nIG51bWJlciBvZiBjb3VudHJpZXMKYGBge3J9CmNvdW50KGRmLCAiY29kZSIpCmBgYAojY2hlY2tpbmcgdGhlIGRhdGEKYGBge3J9CmhlYWQoZGYsIDIwKQpgYGAKI2NoZWNraW5nIHRoZSBkYXRhCmBgYHtyfQp0YWlsKGRmKQpgYGAKI3VzaW5nIGEgZnVuY3Rpb24gdG8gZ2l2ZSBuYW4gdmFsdWVzIHRvICIuLiIKYGBge3J9CmZpbGxQIDwtIGZ1bmN0aW9uKHgpewogIGlmICh4PT0iLi4iKSB7CiAgICByZXR1cm4oTkEpCiAgfSAKICBlbHNlIHsKICAgIHJldHVybihhcy5udW1lcmljKHgpKQogIH0KICB9CmRmJHZhbHVlIDwtIHNhcHBseShkZiR2YWx1ZSwgZmlsbFApCmBgYAoKI2NoZWNraW5nIHRoZSBkYXRhCmBgYHtyfQpzdHIoZGYpCmBgYAoKI2NoZWNraW5nIHRoZSBkYXRhCmBgYHtyfQpzdW0oaXMubmEoZGYkY291bnRyeSkpCnN1bShpcy5uYShkZiRjb2RlKSkKc3VtKGlzLm5hKGRmJHllYXIpKQpzdW0oaXMubmEoZGYkdmFsdWUpID09IEYpCmBgYAoKI3Bpdm90aW5nIHRoZSBkYXRhCmBgYHtyfQpkZl9wIDwtIGRjYXN0KGRmLCBjb3VudHJ5ICsgY29kZSArIHllYXIgfiBzZXJpZSwgdmFsdWUudmFyPSJ2YWx1ZSIsIGZ1bi5hZ2dyZWdhdGU9c3VtKQpgYGAKI2NoZWNraW5nIHRoZSBkYXRhCmBgYHtyfQpoZWFkKGRmX3ApCnRhaWwoZGZfcCkKYGBgCgoKCiNjcmVhdGluZyBhIG5ldyBkYXRhZnJhbWUgYnkgdGFraW5nICdQZXJjZW50IG9mIGZpcm1zIHRoYXQgc3BlbmQgb24gUiZEJyBhcyByZWZlcmVuY2UKYGBge3J9CmRmX20gPC0gc3Vic2V0KGRmX3AsIHN1YnNldCA9IGlzLm5hKGRmX3BbJ1BlcmNlbnQgb2YgZmlybXMgdGhhdCBzcGVuZCBvbiBSJkQnXSk9PUZBTFNFKQpoZWFkKGRmX20pCmBgYAoKI251bWJlciBvZiB1bmlxdWUgY291bnRyaWVzCmBgYHtyfQpsZW5ndGgodW5pcXVlKGRmX20kY291bnRyeSkpCmBgYAojZWxpbWluYXRpb24gdGhvc2UgY291bnRyaWVzIGhhdmluZyBtb3JlIHRoYW4gb25lIG9iZXJzZXJ2YXRpb24gCmBgYHtyfQpmb3IgKGkgaW4gdW5pcXVlKGRmX20kY291bnRyeSkpewogIGQgPC0gc3Vic2V0KGRmX20sIHN1YnNldCA9IGRmX20kY291bnRyeSA9PSBpKQogIGlmIChucm93KGQpID4gMSl7CiAgICBkIDwtIHRhaWwoZCwgMSkKICAgIGRmX20gPC0gc3Vic2V0KGRmX20sIHN1YnNldCA9IGRmX20kY291bnRyeSAhPSBpKQogICAgZGZfbSA8LSByYmluZChkZl9tLCBkKSAKICB9Cn0KCmRmX20KYGBgCgpgYGB7cn0KdGFpbChkZl9tLCAxMCkKYGBgCmBgYHtyfQpzdWJzZXQoZGZfbSwgc3Vic2V0ID0gZGZfbSRjb3VudHJ5ID09ICJBbGJhbmlhIikKYGBgCiNkcm9wcGluZyBhIGNvbHVtbiBiY3ogb2YgbWlzc2luZyB2YWx1ZXMKYGBge3J9CmRmX21bJ1BlcmNlbnQgb2YgZmlybXMgdXNpbmcgZS1tYWlsIHRvIGludGVyYWN0IHdpdGggY2xpZW50cy9zdXBwbGllcnMnXSA8LSBOVUxMCmBgYAoKI3RoZSBudW1iZXIgcm93cyBpbiBjYXNlIGRyb3BwaW5nIGFsbCBuYW4gdmFsdWVzCmBgYHtyfQpucm93KG5hLm9taXQoZGZfbSkpCgpgYGAKCiNjb3JyIHRhYmxlIGZvciAnUGVyY2VudCBvZiBmaXJtcyB0aGF0IHNwZW5kIG9uIFImRCcgCmBgYHtyfQpudW1zIDwtIHVubGlzdChsYXBwbHkoZGZfbSwgaXMubnVtZXJpYykpIApteWRhdGEucmNvcnIgPC0gcmNvcnIoYXMubWF0cml4KGRmX21bLG51bXNdKSkKYyA8LSBhcy5kYXRhLmZyYW1lKG15ZGF0YS5yY29yciRyKQpjW29yZGVyKGNbIlBlcmNlbnQgb2YgZmlybXMgdGhhdCBzcGVuZCBvbiBSJkQiXSksXVsiUGVyY2VudCBvZiBmaXJtcyB0aGF0IHNwZW5kIG9uIFImRCJdCgoKYGBgCgojdmFyaWFibGUgbGlzdCBmb3IgdGhlIG1vZGVsIGJhc2VkIG9uIGNvcnIgdGFibGUKYGBge3J9Cm1vZGVsLmxpc3QgPC0gYygiY291bnRyeSIsImNvZGUiLAogICAgICAgICAgICAgICAgIlBlcmNlbnQgb2YgZmlybXMgd2l0aCBhdCBsZWFzdCAxMCUgb2YgZm9yZWlnbiBvd25lcnNoaXAiLAogICAgICAgICAgICAgICAgIlBlcmNlbnQgb2YgZmlybXMgd2l0aCBhbiBpbnRlcm5hdGlvbmFsbHktcmVjb2duaXplZCBxdWFsaXR5IGNlcnRpZmljYXRpb24iLAogICAgICAgICAgICAgICAgIlBlcmNlbnQgb2YgZmlybXMgd2l0aCBmZW1hbGUgcGFydGljaXBhdGlvbiBpbiBvd25lcnNoaXAiLAogICAgICAgICAgICAgICAgIlBlcmNlbnQgb2YgZmlybXMgaGF2aW5nIHRoZWlyIG93biBXZWIgc2l0ZSIsCiAgICAgICAgICAgICAgICAiUGVyY2VudCBvZiBmaXJtcyBvZmZlcmluZyBmb3JtYWwgdHJhaW5pbmciLAogICAgICAgICAgICAgICAgIlByb3BvcnRpb24gb2YgaW52ZXN0bWVudCBmaW5hbmNlZCBieSBiYW5rcyAoJSkiLAogICAgICAgICAgICAgICAgIlByb3BvcnRpb24gb2YgcHJpdmF0ZSBkb21lc3RpYyBvd25lcnNoaXAgaW4gYSBmaXJtICglKSIsCiAgICAgICAgICAgICAgICAiUGVyY2VudCBvZiBmaXJtcyBleHBlY3RlZCB0byBnaXZlIGdpZnRzIGluIG1lZXRpbmdzIHdpdGggdGF4IG9mZmljaWFscyIsCiAgICAgICAgICAgICAgICAiUGVyY2VudCBvZiBmaXJtcyBjb21wZXRpbmcgYWdhaW5zdCB1bnJlZ2lzdGVyZWQgb3IgaW5mb3JtYWwgZmlybXMiLAogICAgICAgICAgICAgICAgIlBlcmNlbnQgb2YgZmlybXMgdGhhdCBzcGVuZCBvbiBSJkQiKQpgYGAKCiNtb2RlbCBkYXRhZnJhbWUKYGBge3J9CmRmX20gPC0gZGZfbVttb2RlbC5saXN0XQpjb2xuYW1lcyhkZl9tKVszOmxlbmd0aChjb2xuYW1lcyhkZl9tKSldIDwtIGMoImZvcmVpZ24iLCJjZXJ0aSIsImZPd24iLCJ3ZWIiLCJ0cmFpbiIsICJiYW5rIiwiZE93biIsInRheCIsImluZm9ybWFsIiwicmQiKQpoZWFkKGRmX20pCmBgYAojY2hlY2tpbmcgdGhlIGRhdGEKYGBge3J9CnN1bW1hcnkoZGZfbSkKYGBgCiNjaGVja2luZyB0aGUgZGF0YQpgYGB7cn0Kc3RyKGRmX20pCmBgYAoKI2hpc3RvcmdyYW1zIGZvciBlYWNoIHZhcmlhYmxlcwpgYGB7cn0KCmZvciAoaSBpbiBjb2xuYW1lcyhkZl9tKVszOmxlbmd0aChjb2xuYW1lcyhkZl9tKSldKXsKICBoaXN0KGRmX21bLGldLCBtYWluID0gaSwgeGxhYiA9IGkpCgogIAp9CmBgYAojc2NhdHRlciBwbG90IGZvciBlYWNoIHZhcmlhYmxlcwpgYGB7cn0KCmZvciAoaSBpbiBjb2xuYW1lcyhkZl9tKVszOmxlbmd0aChjb2xuYW1lcyhkZl9tKSldKXsKICBzY2F0dGVyLnNtb290aChkZl9tWyxpXSwgZGZfbVssInJkIl0sIG1haW4gPSBpLCB4bGFiID0gaSkKCiAgCn0KYGBgCgojZGVhbGluZyB3aXRoIG91dGxpZXJzCmBgYHtyfQpzdWJzZXQoZGZfbSwgc3Vic2V0ID0gZGZfbSRpbmZvcm1hbCA+IDgwKQpgYGAKI2RlYWxpbmcgd2l0aCBvdXRsaWVycwpgYGB7cn0Kc3Vic2V0KGRmX20sIHN1YnNldCA9IGRmX20kcmQgPiA0NSkKYGBgCiNkZWFsaW5nIHdpdGggb3V0bGllcnMKYGBge3J9CmRmX20gPC0gc3Vic2V0KGRmX20sIHN1YnNldCA9IGRmX20kcmQgPCA0NSkKZGZfbSA8LSBzdWJzZXQoZGZfbSwgc3Vic2V0ID0gZGZfbSRmb3JlaWduIDwgNjApCmRmX20gPC0gc3Vic2V0KGRmX20sIHN1YnNldCA9IGRmX20kY2VydGkgPCA1MCkKZGZfbSA8LSBzdWJzZXQoZGZfbSwgc3Vic2V0ID0gZGZfbSRkT3duID4gNTApCmRmX20gPC0gc3Vic2V0KGRmX20sIHN1YnNldCA9IGRmX20kdGF4IDwgNTApCmRmX20gPC0gc3Vic2V0KGRmX20sIHN1YnNldCA9IGRmX20kaW5mb3JtYWwgPCA4NSkKYGBgCgpgYGB7cn0KaGVhZChkZl9tKQpgYGAKI3NjYXR0ZXIgcGxvdCBmb3IgZWFjaCB2YXJpYWJsZXMKYGBge3J9CmZvciAoaSBpbiBjb2xuYW1lcyhkZl9tKVszOmxlbmd0aChjb2xuYW1lcyhkZl9tKSldKXsKICBzY2F0dGVyLnNtb290aChkZl9tWyxpXSwgZGZfbVssInJkIl0sIG1haW4gPSBpLCB4bGFiID0gaSkKfQpgYGAKCgojc2hvd2luZyB0aGVyZSBpcyBubyBhbnkgbWlzc2luZyB2YWx1ZXMKYGBge3J9Cm5yb3cobmEub21pdChkZl9tKSkKbnJvdygoZGZfbSkpCmBgYAoKI2NvcnIgbWF0cml4CmBgYHtyfQpsaWJyYXJ5KGNvcnJncmFtKQpjb3JyZ3JhbShkZl9tLG9yZGVyPVRSVUUsIGxvd2VyLnBhbmVsPXBhbmVsLnNoYWRlLAogIHVwcGVyLnBhbmVsPXBhbmVsLnBpZSwgdGV4dC5wYW5lbD1wYW5lbC50eHQpCmBgYAojY29yciBtYXRyaXgKYGBge3J9Cm51bXMgPC0gdW5saXN0KGxhcHBseShkZl9tLCBpcy5udW1lcmljKSkgCm15ZGF0YS5yY29yciA8LSByY29ycihhcy5tYXRyaXgoZGZfbVssbnVtc10pKQpjIDwtIGFzLmRhdGEuZnJhbWUobXlkYXRhLnJjb3JyJHIpCmMKYGBgCiN0aGVyZSBhcmUgaGlnaCBjb3JyZWFsdGlvbiBiZXR3ZWVuIGZvcmVpbmcgJiBkT3duLCBjZXJ0aSAmIHdlYi4KCgoKCgoKI3NpbXBsZSByZWdyZXNzaW9uIGZvciBlYWNoIHZhcmlhYmxlcyB3aXRoIFBlcmNlbnQgb2YgZmlybXMgdGhhdCBzcGVuZCBvbiBSJkQKYGBge3J9CmZvciAoaSBpbiBjb2xuYW1lcyhkZl9tKVszOihsZW5ndGgoY29sbmFtZXMoZGZfbSkpLTEpXSl7CiAgbW9kZWwgPC0gbG0ocmQgfiAuLCBkZl9tW2MoInJkIixpKV0pCiAgcHJpbnQoc3VtbWFyeShtb2RlbCkpCn0KCmBgYAojbGluZXJhbCBtb2RlbCB3aXRoIGFsbCB2YXJpYWJsZXMKYGBge3J9Cm1vZGVsIDwtIGxtKHJkIH4gLiwgc3Vic2V0KGRmX20sIHNlbGVjdCA9IC1jKGNvdW50cnksIGNvZGUpKSkKc3VtbWFyeShtb2RlbCkKYGBgCiNsaW5lYXIgbW9kZWwgYnkgZHJvcHBpbmcgd2ViIGRPd24gdmFyaWFibGVzIGJhc2VkIG9uIGNvcnIgdGFibGUKYGBge3J9Cm1vZGVsIDwtIGxtKHJkIH4gLiwgc3Vic2V0KGRmX20sIHNlbGVjdCA9IC1jKGNvdW50cnksIGNvZGUsIHdlYiwgZE93bikpKQpzdW1tYXJ5KG1vZGVsKQpgYGAKI2hpc3RvZ3JhbSBmb3IgcmVzaWR1YWxzCmBgYHtyfQoKcmVzIDwtIHJlc2lkdWFscyhtb2RlbCkKCgpyZXMgPC0gYXMuZGF0YS5mcmFtZShyZXMpCgoKZ2dwbG90KHJlcyxhZXMocmVzKSkgKyAgZ2VvbV9oaXN0b2dyYW0oZmlsbD0nYmx1ZScsYWxwaGE9MC41KQpgYGAKcGxvdHMgZm9yIHJlc2lkdWFscwpgYGB7cn0KcGxvdChtb2RlbCkKYGBgCiNzcGxpdHRpbmcgZGF0YSBpbnRvIHRyYWluIHRlc3QgZGF0YXMKYGBge3J9CiMKbGlicmFyeShjYVRvb2xzKQoKc2V0LnNlZWQoMTAxKSAKCgpzYW1wbGUgPC0gc2FtcGxlLnNwbGl0KGRmX20kcmQsIFNwbGl0UmF0aW8gPSAwLjg1KSAKCnRyYWluID0gc3Vic2V0KHN1YnNldChkZl9tLCBzZWxlY3QgPSAtYyhjb3VudHJ5LCBjb2RlLCB3ZWIsIGRPd24pKSwgc2FtcGxlID09IFRSVUUpCgp0ZXN0ID0gc3Vic2V0KHN1YnNldChkZl9tLCBzZWxlY3QgPS1jKGNvdW50cnksIGNvZGUsIHdlYiwgZE93bikpLCBzYW1wbGUgPT0gRkFMU0UpCmBgYAojdHJhaW5pbmcgbGluZWFyIG1vZGVsCmBgYHtyfQptb2RlbCA8LSBsbShyZCB+IC4sdHJhaW4pCnIucHJlZGljdGlvbnMgPC0gcHJlZGljdChtb2RlbCx0ZXN0KQpgYGAKCiNwcmVkaWN0aW5nIGJlc2VkIG9uIHRlc3QgZGF0YXMKYGBge3J9CnJlc3VsdHMgPC0gY2JpbmQoci5wcmVkaWN0aW9ucyx0ZXN0JHJkKSAKY29sbmFtZXMocmVzdWx0cykgPC0gYygncHJlZCcsJ3JlYWwnKQpyZXN1bHRzIDwtIGFzLmRhdGEuZnJhbWUocmVzdWx0cykKYGBgCiNkZWFsaW5nIHdpdGggemVybyB2YWx1ZXMKYGBge3J9CnRvX3plcm8gPC0gZnVuY3Rpb24oeCl7CiAgICBpZiAgKHggPCAwKXsKICAgICAgICByZXR1cm4oMCkKICAgIH1lbHNlewogICAgICAgIHJldHVybih4KQogICAgfQp9CnJlc3VsdHMkcHJlZCA8LSBzYXBwbHkocmVzdWx0cyRwcmVkLHRvX3plcm8pCgpgYGAKCiNjYWxjdWxhdGlubmcgbWVhbiBzcXVhcnRlZCBlcnJvcgpgYGB7cn0KbXNlIDwtIG1lYW4oKHJlc3VsdHMkcmVhbC1yZXN1bHRzJHByZWQpXjIpCnByaW50KG1zZSkKYGBgCiNjYWxjdWxhdGlubmcgY29lZmZpY2llbnQgb2YgZGV0ZXJtaW5hdGlvbgpgYGB7cn0KU1NFID0gc3VtKChyZXN1bHRzJHByZWQgLSByZXN1bHRzJHJlYWwpXjIpClNTVCA9IHN1bSggKG1lYW4oZGZfbSRyZCkgLSByZXN1bHRzJHJlYWwpXjIpCgpSMiA9IDEgLSBTU0UvU1NUClIyCmBgYAojYXBwbGluZyByYW5kb20gZm9yZXN0IG1vZGVsCmBgYHtyfQpsaWJyYXJ5KHJhbmRvbUZvcmVzdCkKbW9kZWwgPC0gcmFuZG9tRm9yZXN0KHJkIH4gLix0cmFpbixtcG9ydGFuY2UgPSBUUlVFLCBuYS5hY3Rpb24gPSBuYS5vbWl0KSAjI3RyYWluaW5nICBtb2RlbAptb2RlbApyLnByZWRpY3Rpb25zIDwtIHByZWRpY3QobW9kZWwsdGVzdCkgICNwcmVkaWN0aW5nIGJlc2VkIG9uIHRlc3QgZGF0YXMKcmVzdWx0cyA8LSBjYmluZChyLnByZWRpY3Rpb25zLHRlc3QkcmQpIApjb2xuYW1lcyhyZXN1bHRzKSA8LSBjKCdwcmVkJywncmVhbCcpCnJlc3VsdHMgPC0gYXMuZGF0YS5mcmFtZShyZXN1bHRzKQp0b196ZXJvIDwtIGZ1bmN0aW9uKHgpeyAgICNkZWFsaW5nIHdpdGggemVybyB2YWx1ZXMKICAgIGlmICAoeCA8IDApewogICAgICAgIHJldHVybigwKQogICAgfWVsc2V7CiAgICAgICAgcmV0dXJuKHgpCiAgICB9Cn0KcmVzdWx0cyRwcmVkIDwtIHNhcHBseShyZXN1bHRzJHByZWQsdG9femVybykKbXNlIDwtIG1lYW4oKHJlc3VsdHMkcmVhbC1yZXN1bHRzJHByZWQpXjIpICAgICNjYWxjdWxhdGlubmcgbWVhbiBzcXVhcnRlZCBlcnJvcgpwcmludChtc2UpClNTRSA9IHN1bSgocmVzdWx0cyRwcmVkIC0gcmVzdWx0cyRyZWFsKV4yKSAgICNjYWxjdWxhdGlubmcgY29lZmZpY2llbnQgb2YgZGV0ZXJtaW5hdGlvbgpTU1QgPSBzdW0oIChtZWFuKGRmX20kcmQpIC0gcmVzdWx0cyRyZWFsKV4yKQoKUjIgPSAxIC0gU1NFL1NTVApSMgpgYGAKI3Nob3dpbmcgd2hpY2ggdmFyaWFibGUgaXMgaW1wb3J0YW50IGJhc2VkIG9uIHJhbmRvbSBmb3Jlc3QgbW9kZWwuIApgYGB7cn0KaW1wb3J0YW5jZShtb2RlbCkKYGBgCgoKI2NsdXN0cmluZyBiYXNlZCBvbiBleHBsYW5hdG9yeSB2YXJpYWJsZXMgKDUgY2x1c3RlcikKYGBge3J9CmttXzUgPC0ga21lYW5zKCBzdWJzZXQoZGZfbSwgc2VsZWN0ID0gLWMoY291bnRyeSwgY29kZSwgd2ViLCBkT3duKSksIDUsbnN0YXJ0ID0gMTApCgpgYGAKI2NsdXN0cmluZyBiYXNlZCBvbiBleHBsYW5hdG9yeSB2YXJpYWJsZXMgKDcgY2x1c3RlcikKYGBge3J9CmttXzcgPC0ga21lYW5zKCBzdWJzZXQoZGZfbSwgc2VsZWN0ID0gLWMoY291bnRyeSwgY29kZSwgd2ViLCBkT3duKSksIDcsIG5zdGFydCA9IDEwKQpgYGAKI2FkZGluZyBwcmVkaWN0ZWQgY2x1c3RlciB0byBkYXRhZnJhbWUKYGBge3J9CmRmX21bIms1Il0gPC0ga21fNSRjbHVzdGVyCmRmX21bIms3Il0gPC0ga21fNyRjbHVzdGVyCmBgYAoKI3Zpc3VhbGl6YXRpb24gb2YgY2x1c3RlcnMKYGBge3J9CmxpYnJhcnkoY2x1c3RlcikgCmNsdXNwbG90KGRmX20sIGRmX20kazUsIGNvbG9yPVRSVUUsIHNoYWRlPVRSVUUsIGxhYmVscz0wLGxpbmVzPTAsICkKY2x1c3Bsb3QoZGZfbSwgZGZfbSRrNywgY29sb3I9VFJVRSwgc2hhZGU9VFJVRSwgbGFiZWxzPTAsbGluZXM9MCwgKQpgYGAKYGBge3J9CmxpYnJhcnkodGlkeXIpCmxpYnJhcnkoZ3JpZCkgCmxpYnJhcnkocndvcmxkbWFwKQpsaWJyYXJ5KG1hcHByb2opCmBgYAoKYGBge3J9CmRmX20KI2pvaW4oZGZfbSwgYXMuZGF0YS5mcmFtZSh3b3JsZE1hcCROQU1FKSx0eXBlID0ncmlnaHQnLCBieSA9IHdvcmxkTWFwJE5BTUUgKQpgYGAKCiN3b3JsZCBtYXAgYmFzZWQgb24gUGVyY2VudCBvZiBmaXJtcyB0aGF0IHNwZW5kIG9uIFImRCB2YXJpYWJsZQpgYGB7cn0Kd29ybGRNYXA8LWdldE1hcCgpICMgd29ybGRtYXAgbGFkZW4KCm1mIDwtIG1lcmdlKGRmX20sIGFzLmRhdGEuZnJhbWUod29ybGRNYXAkSVNPX0EzKSwgIGJ5LnggPSAnY29kZScsIGJ5LnkgPSAid29ybGRNYXAkSVNPX0EzIiwgc29ydCA9IFRSVUUsYWxsLnk9VFJVRSApCm0gPC0gIHdoaWNoKHdvcmxkTWFwJElTT19BMyVpbiVtZiRjb2RlKQoKQ29vcmRzIDwtIGxhcHBseShtLCBmdW5jdGlvbihpKXsKICBmIDwtIGRhdGEuZnJhbWUod29ybGRNYXBAcG9seWdvbnNbW2ldXUBQb2x5Z29uc1tbMV1dQGNvb3JkcykKICBmJHJlZ2lvbiA9YXMuY2hhcmFjdGVyKHdvcmxkTWFwJElTT19BM1tpXSkKICBjb2xuYW1lcyhmKSA8LSBsaXN0KCJsb25nIiwgImxhdCIsICJyZWdpb24iKQogIHJldHVybihmKQp9KQoKQ29vcmRzIDwtIGRvLmNhbGwoInJiaW5kIiwgQ29vcmRzKQoKdHcgPC0gZGF0YS5mcmFtZShjb3VudHJ5ID0gbWYkY29kZSwgdmFsdWUgPSBtZiRyZCkKQ29vcmRzJHZhbHVlMjAxNCA8LSB0dyR2YWx1ZVttYXRjaChDb29yZHMkcmVnaW9uLHR3JGNvdW50cnkpXQoKbXAgPC0gZ2dwbG90KCkgKyBnZW9tX3BvbHlnb24oZGF0YSA9IENvb3JkcywgYWVzKHggPSBsb25nLCB5ID0gbGF0LCBncm91cCA9IHJlZ2lvbiwgZmlsbCA9IHZhbHVlMjAxNCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29sb3VyID0gImJsYWNrIiwgc2l6ZSA9IDAuMSkgCiAgI2Nvb3JkX21hcCh4bGltID0gYygtMTMsIDM1KSwgIHlsaW0gPSBjKDMyLCA3MSkpCiAgICAgICAgICAgIAoKbXAgPC0gbXAgKyBzY2FsZV9maWxsX2dyYWRpZW50MihuYW1lID0gIlImRCIsIGxvdyA9ICJjb3JhbCIsIG1pZD0id2hpdGUiLCBoaWdoID0gImJsdWUiLCBtaWRwb2ludD0yMCwgc3BhY2U9IkxhYiIsIG5hLnZhbHVlID0gImdyZXk1MCIpCgoKbXAgPC0gbXAgKyB0aGVtZSgjcGFuZWwuZ3JpZC5taW5vciA9IGVsZW1lbnRfbGluZShjb2xvdXIgPSBOQSksIHBhbmVsLmdyaWQubWlub3IgPSBlbGVtZW50X2xpbmUoY29sb3VyID0gTkEpLAogICAgICAgICAgICAgICAjcGFuZWwuYmFja2dyb3VuZCA9IGVsZW1lbnRfcmVjdChmaWxsID0gTkEsIGNvbG91ciA9IE5BKSwKICAgICAgICAgICAgICAgYXhpcy50ZXh0LnggPSBlbGVtZW50X2JsYW5rKCksCiAgICAgICAgICAgICAgIGF4aXMudGV4dC55ID0gZWxlbWVudF9ibGFuaygpLCBheGlzLnRpY2tzLnggPSBlbGVtZW50X2JsYW5rKCksCiAgICAgICAgICAgICAgIGF4aXMudGlja3MueSA9IGVsZW1lbnRfYmxhbmsoKSwgYXhpcy50aXRsZSA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICAgICAgICAgI3JlY3QgPSBlbGVtZW50X2JsYW5rKCksCiAgICAgICAgICAgICAgIHBsb3QubWFyZ2luID0gdW5pdCgwICogYygtMS41LCAtMS41LCAtMS41LCAtMS41KSwgImxpbmVzIikpCm1wCmBgYAojd29ybGQgbWFwIGJhc2VkIG9uIGNsdXN0ZXJzCmBgYHtyfQp3b3JsZE1hcDwtZ2V0TWFwKCkgIyB3b3JsZG1hcCBsYWRlbgoKbWYgPC0gbWVyZ2UoZGZfbSwgYXMuZGF0YS5mcmFtZSh3b3JsZE1hcCRJU09fQTMpLCAgYnkueCA9ICdjb2RlJywgYnkueSA9ICJ3b3JsZE1hcCRJU09fQTMiLCBzb3J0ID0gVFJVRSxhbGwueT1UUlVFICkKbSA8LSAgd2hpY2god29ybGRNYXAkSVNPX0EzJWluJW1mJGNvZGUpCgpDb29yZHMgPC0gbGFwcGx5KG0sIGZ1bmN0aW9uKGkpewogIGYgPC0gZGF0YS5mcmFtZSh3b3JsZE1hcEBwb2x5Z29uc1tbaV1dQFBvbHlnb25zW1sxXV1AY29vcmRzKQogIGYkcmVnaW9uID1hcy5jaGFyYWN0ZXIod29ybGRNYXAkSVNPX0EzW2ldKQogIGNvbG5hbWVzKGYpIDwtIGxpc3QoImxvbmciLCAibGF0IiwgInJlZ2lvbiIpCiAgcmV0dXJuKGYpCn0pCgpDb29yZHMgPC0gZG8uY2FsbCgicmJpbmQiLCBDb29yZHMpCgp0dyA8LSBkYXRhLmZyYW1lKGNvdW50cnkgPSBtZiRjb2RlLCB2YWx1ZSA9IG1mJGs1KQpDb29yZHMkdmFsdWUgPC0gdHckdmFsdWVbbWF0Y2goQ29vcmRzJHJlZ2lvbix0dyRjb3VudHJ5KV0KCm1wIDwtIGdncGxvdCgpICsgZ2VvbV9wb2x5Z29uKGRhdGEgPSBDb29yZHMsIGFlcyh4ID0gbG9uZywgeSA9IGxhdCwgZ3JvdXAgPSByZWdpb24sIGZpbGwgPSB2YWx1ZSksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29sb3VyID0gImJsYWNrIiwgc2l6ZSA9IDAuMSkgCiAgI2Nvb3JkX21hcCh4bGltID0gYygtMTMsIDM1KSwgIHlsaW0gPSBjKDMyLCA3MSkpCiAgICAgICAgICAgIAoKbXAgPC0gbXAgKyBzY2FsZV9maWxsX2dyYWRpZW50MihuYW1lID0gIkNsdXN0ZXJzIiwgbG93ID0gInJlZCIsIG1pZD0id2hpdGUiLCBoaWdoID0gImJsdWUiLCBzcGFjZT0iTGFiIiwgbmEudmFsdWUgPSAiZ3JleTUwIiwgbWlkcG9pbnQgPSAzKQoKCm1wIDwtIG1wICsgdGhlbWUoI3BhbmVsLmdyaWQubWlub3IgPSBlbGVtZW50X2xpbmUoY29sb3VyID0gTkEpLCBwYW5lbC5ncmlkLm1pbm9yID0gZWxlbWVudF9saW5lKGNvbG91ciA9IE5BKSwKICAgICAgICAgICAgICAgI3BhbmVsLmJhY2tncm91bmQgPSBlbGVtZW50X3JlY3QoZmlsbCA9IE5BLCBjb2xvdXIgPSBOQSksCiAgICAgICAgICAgICAgIGF4aXMudGV4dC54ID0gZWxlbWVudF9ibGFuaygpLAogICAgICAgICAgICAgICBheGlzLnRleHQueSA9IGVsZW1lbnRfYmxhbmsoKSwgYXhpcy50aWNrcy54ID0gZWxlbWVudF9ibGFuaygpLAogICAgICAgICAgICAgICBheGlzLnRpY2tzLnkgPSBlbGVtZW50X2JsYW5rKCksIGF4aXMudGl0bGUgPSBlbGVtZW50X2JsYW5rKCksCiAgICAgICAgICAgICAgICNyZWN0ID0gZWxlbWVudF9ibGFuaygpLAogICAgICAgICAgICAgICAjcGxvdC5tYXJnaW4gPSB1bml0KDAgKiBjKC0xLjUsIC0xLjUsIC0xLjUsIC0xLjUpLCAibGluZXMiKQogICAgICAgICAgICAgICApCm1wCmBgYAoKCgojbWFraW5nICBhIG5ldyBkYXRhZnJhbWUgZm9yIG5ldXJhbCBuZXR3b3JrCmBgYHtyfQpkZl9tIDwtIG5hLm9taXQoZGZfbSkKZGZfbiA8LSBzdWJzZXQoZGZfbSwgc2VsZWN0ID0gLWMoY291bnRyeSwgY29kZSwgazUsazcsIGRPd24sIHdlYikpCmBgYAojbWFraW5nIGZvcm11bGEgYmFzZWQgb24gY29sdW1uIG5hbWVzIGZvciB0aGUgbW9kZWwKYGBge3J9Cm4gPC0gbmFtZXMoZGZfbikKIGFzLmZvcm11bGEocGFzdGUoInJkIH4iLCBwYXN0ZShuWyFuICVpbiUgInJkIl0sIGNvbGxhcHNlID0gIiArICIpKSkKYGBgCiNmaW5kaW5nIG1heCBhbmQgbWlucyBmb3IgZWFjaCBjbG91bW4KYGBge3J9Cm1heHMgPC0gYXBwbHkoZGZfbiwgMiwgbWF4KSAKbWlucyA8LSBhcHBseShkZl9uLCAyLCBtaW4pCmBgYAojc2NhbGluZyBjb2x1bW5zCmBgYHtyfQpzY2FsZWQgPC0gYXMuZGF0YS5mcmFtZShzY2FsZShkZl9uLCBjZW50ZXIgPSBtaW5zLCBzY2FsZSA9IG1heHMgLSBtaW5zKSkKYGBgCiNzcGxpdHRpbmcgZGF0YXNldApgYGB7cn0Kc3BsaXQgPSBzYW1wbGUuc3BsaXQoc2NhbGVkJHJkLCBTcGxpdFJhdGlvID0gMC45MCkKCnRyYWluID0gc3Vic2V0KHNjYWxlZCwgc3BsaXQgPT0gVFJVRSkKdGVzdCA9IHN1YnNldChzY2FsZWQsIHNwbGl0ID09IEZBTFNFKQpgYGAKCiN0cmFpbmluZyBtb2RlbApgYGB7cn0KbGlicmFyeShuZXVyYWxuZXQpCm4gPC0gbmFtZXModHJhaW4pCmYgPC0gYXMuZm9ybXVsYShwYXN0ZSgicmQgfiIsIHBhc3RlKG5bIW4gJWluJSAicmQiXSwgY29sbGFwc2UgPSAiICsgIikpKQptb2RlbCA8LSBuZXVyYWxuZXQoZixkYXRhPXRyYWluLGhpZGRlbj1jKDUsMyksbGluZWFyLm91dHB1dD1UUlVFKQoKYGBgCgojcHJlZGljdGluZyBhbmQgcGxvdHRpbmcgcHJlZGljdGVkIGFuZCByZWFsIHZhbHVlcwpgYGB7cn0KcHJlZGljdGVkLm5uLnZhbHVlcyA8LSBjb21wdXRlKG1vZGVsLCB0ZXN0WzE6KGxlbmd0aCh0ZXN0KS0xKV0pCnRydWUucHJlZGljdGlvbnMgPC0gcHJlZGljdGVkLm5uLnZhbHVlcyRuZXQucmVzdWx0KihtYXgoZGZfbiRyZCktbWluKGRmX24kcmQpKSttaW4oZGZfbiRyZCkKdGVzdC5yIDwtICh0ZXN0JHJkKSoobWF4KGRmX24kcmQpLW1pbihkZl9uJHJkKSkrbWluKGRmX24kcmQpCmVycm9yLmRmIDwtIGRhdGEuZnJhbWUodGVzdC5yLHRydWUucHJlZGljdGlvbnMpCmdncGxvdChlcnJvci5kZixhZXMoeD10ZXN0LnIseT10cnVlLnByZWRpY3Rpb25zKSkgKyBnZW9tX3BvaW50KCkgKyBzdGF0X3Ntb290aChtZXRob2QgPSAibG0iKQpgYGAKCgoKCgoK